library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.0 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.0
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
surveys_complete <- read_csv("~/Bio 676/Lab6/data/surveys_complete.csv")
## Rows: 30463 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): species_id, sex, genus, species, taxa, plot_type
## dbl (7): record_id, month, day, year, plot_id, hindfoot_length, weight
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ggplot(data=surveys_complete)
# x=weight, y=hindfoot length
ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length))
# x-y with data pt
ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) + geom_point()
#template
surveys_plot <- ggplot(data=surveys_complete,
mapping=aes(x=weight, y=hindfoot_length))
#plot
surveys_plot + geom_point()
ggplot(data=surveys_complete, aes(x=weight, y=hindfoot_length)) + geom_point()
#transparent
ggplot(data=surveys_complete, aes(x=weight, y=hindfoot_length)) + geom_point(alpha=0.1)
#color
ggplot(data=surveys_complete, aes(x=weight, y=hindfoot_length)) + geom_point(alpha=0.1, color="blue")
#color by vector
ggplot(data=surveys_complete, mapping=aes(x=weight, y=hindfoot_length)) + geom_point(alpha=0.1, aes(color=species_id))
#boxplot
ggplot(data=surveys_complete, mapping=aes(x=species_id, y=weight)) + geom_boxplot()
#jitter without outliers
ggplot(data=surveys_complete, mapping=aes(x=species_id, y=weight)) + geom_boxplot(outlier.shape=NA) + geom_jitter(alpha=0.3, color="tomato")
yearly_counts <- surveys_complete %>%
count(year, genus)
#line graph of genus counts
ggplot(data=yearly_counts, aes(x=year, y=n)) + geom_line()
#groups
ggplot(data=yearly_counts, aes(x=year, y=n, group=genus)) + geom_line()
#use color to group
ggplot(data=yearly_counts, aes(x=year, y=n, color=genus)) + geom_line()
#piping
yearly_counts %>%
ggplot(mapping=aes(x=year, y=n, color=genus)) +
geom_line()
#piping everything
yearly_counts_graph <- surveys_complete %>%
count(year, genus) %>%
ggplot(mapping=aes(x=year, y=n, color=genus)) +
geom_line()
yearly_counts_graph
#faceting aka subplots
ggplot(data=yearly_counts, aes(x=year, y=n)) +
geom_line() +
facet_wrap(facets=vars(genus))
#yearlysecount
yearly_sex_counts <- surveys_complete %>%
count(year, genus, sex)
#facet by sex using color
ggplot(data=yearly_sex_counts, mapping=aes(x=year, y=n, color=sex)) + geom_line() + facet_wrap(facets=vars(genus))
# faceting rows and columns
ggplot(data=yearly_sex_counts, mapping=aes(x=year, y=n, color=sex)) + geom_line() + facet_grid(rows=vars(sex), cols=vars(genus))
#can also do it by rows only
ggplot(data=yearly_sex_counts, mapping=aes(x=year, y=n, color=sex)) + geom_line() + facet_grid(rows=vars(genus))
#or by column
ggplot(data=yearly_sex_counts, mapping=aes(x=year, y=n, color=sex)) + geom_line() + facet_grid(cols=vars(genus))
#Themes
ggplot(data=yearly_sex_counts, mapping=aes(x=year, y=n, color=sex)) + geom_line() + facet_wrap(vars(genus)) + theme_bw()
#titles
ggplot(data=yearly_sex_counts, aes(x=year, y=n, color=sex)) + geom_line() + facet_wrap(vars(genus)) + labs(title="Observed genera through time", x="Year of observation", y="Number of individuals") + theme_bw()
#Change the font size using theme
ggplot(data=yearly_sex_counts, aes(x=year, y=n, color=sex)) + geom_line() + facet_wrap(vars(genus)) + labs(title="Observed genera through time", x="Year of observation", y="Number of individuals") + theme_bw() + theme(text=element_text(size=16))
#format figure to include italics for genus, correct font size and orientation
ggplot(data=yearly_sex_counts, aes(x=year, y=n, color=sex)) + geom_line() + facet_wrap(vars(genus)) + labs(title="Observed genera through time", x="Year of observation", y="Number of individuals") + theme_bw() + theme(text=element_text(size=16), axis.text.x=element_text(colour="grey20", size=12, angle=90, hjust=0.5, vjust=0.5), axis.text.y=element_text(colour= "grey20", size=12), strip.text=element_text(face="italic"))
#save the theme
grey_theme <- theme(text=element_text(size=16), axis.text.x=element_text(colour="grey20", size=12, angle=90, hjust=0.5, vjust=0.5), axis.text.y=element_text(colour= "grey20", size=12))
ggplot(surveys_complete, aes(x = species_id, y = hindfoot_length)) +
geom_boxplot() +
grey_theme
install.packages("patchwork")
## Installing package into '/home/jmbauer_umass_edu/R/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library(patchwork)
plot_weight <- ggplot(data=surveys_complete, aes(x=species_id, y=weight)) + geom_boxplot() + labs(x="Species", y=expression(log[10](Weight))) + scale_y_log10()
plot_count <- ggplot(data=yearly_counts, aes(x=year, y=n, color=genus)) + geom_line() + labs(x="Year", y="Abundance")
plot_weight / plot_count + plot_layout(heights=c(3, 2))
Example #7
my_plot <- ggplot(data=yearly_sex_counts, aes(x=year, y=n, color=sex)) + geom_line() + facet_wrap(vars(genus)) + labs(title="Observed genera through time", x="Year of observation", y="Number of individuals") + theme_bw() + theme(axis.text.x=element_text(color="grey20",size=12,angle=90, hjust=0.5, vjust=0.5), axis.text.y=element_text(color="grey20", siz =12), text=element_text(size=16))
ggsave("myplot.png", my_plot, width = 15, height = 10)
## This also works for plots combined with patchwork
plot_combined <- plot_weight / plot_count + plot_layout(heights = c(3, 2))
ggsave("plot_combined.png", plot_combined, width = 10, dpi = 300)
## Saving 10 x 5 in image
#Scatter plots can be useful exploratory tools for small datasets. For data sets with large numbers of observations, such as the surveys_complete data set, overplotting of points can be a limitation of scatter plots. One strategy for handling such settings is to use hexagonal binning of observations. The plot space is tessellated into hexagons. Each hexagon is assigned a color based on the number of observations that fall within its boundaries. To use hexagonal binning with ggplot2, first install the R package hexbin from CRAN:
install.packages("hexbin")
## Installing package into '/home/jmbauer_umass_edu/R/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
## Warning in install.packages("hexbin"): installation of package 'hexbin' had
## non-zero exit status
library(hexbin)
#Then use the geom_hex() function:
surveys_plot + geom_hex()
What are the relative strengths and weaknesses of a hexagonal bin plot
compared to a scatter plot? Examine the above scatter plot and compare
it with the hexagonal bin plot that you created
Strengths: Simplifies the data presentation, making overall trends more evident Weaknesses: With less specificity, its harder to make direct connections to individual data points
#Use what you just learned to create a scatter plot of weight over species_id with the plot types showing in different colors. Is this a good way to show this type of data?
ggplot(data=surveys_complete, mapping=aes(x=species_id, y=weight)) + geom_point(alpha=0.1, aes(color=plot_type))
# Boxplots are useful summaries, but hide the shape of the distribution. For example, if there is a bimodal distribution, it would not be observed with a boxplot. An alternative to the boxplot is the violin plot (sometimes known as a beanplot), where the shape (of the density of points) is drawn.
# Replace the box plot with a violin plot; see geom_violin().
ggplot(data=surveys_complete, mapping=aes(x=species_id, y=weight)) + geom_jitter(alpha=0.3, color="tomato") + geom_violin()
#In many types of data, it is important to consider the scale of the observations. For example, it may be worth changing the scale of the axis to better distribute the observations in the space of the plot. Changing the scale of the axes is done similarly to adding/modifying other components (i.e., by incrementally adding commands). Try making these modifications:
# Represent weight on the log10 scale; see scale_y_log10().
ggplot(data=surveys_complete, mapping=aes(x=species_id, y=weight)) + geom_jitter(alpha=0.3, color="tomato") + geom_violin() + scale_y_log10()
#So far, we’ve looked at the distribution of weight within species. Try making a new plot to explore the distribution of another variable within each species.
#create boxplot for hindfoot_length. Overlay the boxplot layer on a jitter layer to show actual measurements.
ggplot(data=surveys_complete, mapping=aes(x=species_id, y=hindfoot_length)) + geom_jitter(alpha=0.3, aes(color="tomato")) + geom_boxplot(outlier.shape=NA) + scale_y_log10()
#Add color to the data points on your boxplot according to the plot from which the sample was taken (plot_id).
#Hint: Check the class for plot_id. Consider changing the class of plot_id from integer to factor. Why does this change how R makes the graph?
ggplot(data=surveys_complete, mapping=aes(x=species_id, y=hindfoot_length)) + geom_jitter(alpha=0.3, aes(color=factor(plot_id))) + geom_boxplot(outlier.shape=NA) + scale_y_log10()
#Use what you just learned to create a plot that depicts how the average weight of each species changes through the years.
surveys_average_weight <-surveys_complete %>%
group_by(species_id, year) %>%
filter(!is.na(weight)) %>%
summarize(weight_average=mean(weight))
## `summarise()` has grouped output by 'species_id'. You can override using the
## `.groups` argument.
ggplot(data=surveys_average_weight, mapping=aes(x=year,y=weight_average)) + geom_line() + facet_wrap(vars(species_id)) + theme_bw()
#With all of this information in hand, please take another five minutes to either improve one of the plots generated in this exercise or create a beautiful graph of your own. Use the RStudio ggplot2 cheat sheet for inspiration.
#Here are some ideas:
# See if you can change the thickness of the lines.
# Can you find a way to change the name of the legend? What about its labels?
# Try using a different color palette (see https://r-graphics.org/chapter-colors).
library(viridis)
## Loading required package: viridisLite
plot <- ggplot(data=yearly_sex_counts, aes(x=year, y=n, color=sex))+ geom_line(linewidth=2) + facet_wrap(vars(genus)) + labs(title="Observed genera through time", x="Year of observation", y="Number of individuals") + theme_bw() + theme(text=element_text(size=16), axis.text.x=element_text(color="black", size=12, angle=90, hjust=0.5, vjust=0.5), axis.text.y=element_text(color="black", size=12), legend.title=element_text(color="red",face="bold"), strip.text=element_text(face="italic"))
plot + scale_color_viridis(discrete=TRUE)